#   LibAut
#       onset regressions: data preparation
#   Juraj Medzihorsky
#   2021-08-10

library(parallel)
options(mc.cores=3)
options(stringsAsFactors=F)
source('helpers.R')

#   sessionInfo()
##  R version 3.6.3 (2020-02-29)
##  Platform: x86_64-pc-linux-gnu (64-bit)
##  Running under: Ubuntu 16.04.7 LTS
##
##  Matrix products: default
##  BLAS/LAPACK: /opt/OpenBLAS/lib/libopenblas_haswellp-r0.3.9.so
##
##  locale:
##   [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8
##   [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8    LC_PAPER=en_GB.UTF-8       LC_NAME=C
##   [9] LC_ADDRESS=C               LC_TELEPHONE=C             LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
##  attached base packages:
##  [1] parallel  stats     graphics  grDevices utils     datasets  methods   base
##
##  other attached packages:
##  [1] nvimcom_0.9-28 colorout_1.2-0
##
##  loaded via a namespace (and not attached):
##  [1] compiler_3.6.3 tools_3.6.3


#------------------------------------------------------------------------------

code_dir <- getwd()
data_dir <- gsub('code$', 'data', code_dir)


setwd(data_dir)
vdem <- readRDS('V-Dem-CY-Full+Others-v10.rds')
ert <- read.csv('eps.csv')  # episodes with V10 and ERT 2.0

vdem <- subset(vdem, year>=1899)

#-------------------------------------------------------------------------------
#    Exclusive regional EDI

vdem$excl_region_edi <- as.numeric(rep(NA,nrow(vdem)))
for (i in 1:nrow(vdem)) {
    econd1 <- vdem$e_regionpol_6C%in%vdem$e_regionpol_6C[i]
    econd2 <- !(vdem$country_text_id%in%vdem$country_text_id[i])
    vdem$excl_region_edi[i] <- mean(vdem$v2x_polyarchy[econd1&econd2], na.rm=T)
}
gc()

#-------------------------------------------------------------------------------

#   lag reg type by 1 in ert
lag_reg_type <-
    function(i)
    {
        o <- ert$reg_type[(ert$year%in%(ert$year[i]-1))&
                          (ert$country_text_id%in%ert$country_text_id[1])]
        r <- ifelse(length(o)%in%1, o, NA)
        return(r)
    }

ert$lag1_reg_type <- unlist(mclapply(1:nrow(ert), lag_reg_type)) 
gc()

with(ert, table(reg_type, lag1_reg_type, useNA='a'))

#   dem_ep_prch
#       0   not in an ep with dem transition potential
#       1   in an ep with potential for a dem transition

#   dem_ep_ptr: 
#       0   not in an a dem transition episode
#       1   in an dem transition episode

#   not in an episode and in an autocracy
#   first year of a libaut episode

fil0 <- with(ert, (reg_type%in%0)&(dem_ep%in%0))
fil1 <- with(ert, (dem_ep_prch%in%1)&(dem_ep_start_year==year))

#   table(fil0, fil1, useNA='a')

ert$libaut_start <- as.numeric(fil1)
ert$onset_eligible <- as.numeric(fil0|fil1)

#   table(ert$onset_eligible, ert$libaut_start, useNA='a')


merge_vars <- c("country_id", "country_text_id", "country_name", "year")
skip_vars <- c("v2x_regime", "v2x_polyarchy", "v2x_polyarchy_codelow", "v2x_polyarchy_codehigh")

d <- merge(ert, vdem[,!(names(vdem)%in%skip_vars)], by=merge_vars, all.x=T, all.y=T)


#-------------------------------------------------------------------------------

vars_to_lag <-
    c('excl_region_edi',
      'v2x_polyarchy',
      'v2x_regime',
      'reg_type',
      dname('gdp', d),
      dname('pop', d),
      dname('css', d), 
      'v2csprtcpt',
      'v2pepwrsoc',
      'e_miinteco',
      'e_miinterc',
      'v2xnp_regcorr',
      'v2xnp_pres',
      'v2xnp_client',
      'v2x_neopat',
      'v2xps_party',
      'v2xeg_eqdr',
      'e_peedgini')

#   vars_to_lag %in% names(d)
#   vars_to_lag[!(vars_to_lag %in% names(d))]

#   function to lag variables
lagger <-
    function(i, dat=d, v=vars_to_lag) 
    {
        n <- length(v)
        z <- as.data.frame(matrix(rep(NA, n), ncol=n))
        names(z) <- paste0('lag1_', v)
        ii <- which(with(dat, (year%in%(year[i]-1))&(country_text_id%in%country_text_id[i])))
        if (length(ii)==1) { z[,] <- dat[ii,v] }
        return(z)
    }

system.time(ld <- do.call(rbind, mclapply(1:nrow(d), lagger))) ; gc()
ld$lag1_logpop <- log(ld$lag1_e_mipopula) 

dat <- cbind(d, ld)

#-------------------------------------------------------------------------------
#   save

setwd(data_dir)
saveRDS(dat, 'onset_20210810.rds')

#   SCRIPT END